# -*- coding: utf-8 -*-

import sys
from tqdm import tqdm
from csv import DictReader
import os, math, itertools
from numpy.random import normal
import datetime
import statsmodels, csv
import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
from pandas.api.types import CategoricalDtype
from scipy.stats import norm
import subprocess

if 'darwin' in sys.platform:
    print('Running \'caffeinate\' on MacOSX to prevent the system from sleeping')
    subprocess.Popen('caffeinate')

########################################################################

def mean(numbers):
    return float(sum(numbers)) / max(len(numbers), 1)

def ols_cls1(dat_t, RHS_vars):
	if 'latentmeanL2'  in RHS_vars:
		raise Exception('LatentmeanL2 cannot be among the controls for multiple imputation.')
	if len(RHS_vars) >0:form = 'latentmean' + ' ~ cpFlL1 + cpL1 +' + ' + '.join(RHS_vars)
	else:form = 'latentmean' + ' ~ cpFlL1 + cpL1 '
	allVars = ['latentmean', 'ccode', 'cpL1', 'cpFlL1', 'ccode'] + RHS_vars
	dat = dat_t[list(set(allVars))].dropna()
	tsts = dat['ccode'].astype(int)
	dat['ccode'] = pd.Categorical(tsts)
	results = smf.ols(form, data=dat).fit(cov_type='cluster', cov_kwds={'groups': dat['ccode']})
	cpFlL1_est = results.params['cpFlL1']
	cpL1_est = results.params['cpL1']
	cpFlL1_se = results.bse['cpFlL1']
	cpL1_se = results.bse['cpL1']
	return ({'Est': {'cpFlL1': cpFlL1_est, 'cpL1': cpL1_est}, 'Se': {'cpFlL1': cpFlL1_se, 'cpL1': cpL1_se}})

def ols_cls_MI(dat_t, RHS_vars):
	if 'latentmeanL2'  in RHS_vars:
		raise Exception('LatentmeanL2 cannot be among the controls for multiple imputation.')
	if RHS_vars != []:
		form = 'latentmean' + ' ~ cpFlL1 + cpL1 + latentmean_pt1 + ' + ' + '.join(RHS_vars)
	elif RHS_vars == []:
		form = 'latentmean ~ cpFlL1 + cpL1 + latentmean_pt1 '

	cpFl_est = []
	cp_est = []
	cpFl_se = []
	cp_se = []
	for i in range(1, 11):
		dat_t['latentmean_pt1'] = dat_t['latentmean_post_' + str(i) + '_L2']
		allVars = ['latentmean', 'ccode', 'cpL1', 'cpFlL1', 'latentmean_pt1', 'ccode'] + RHS_vars
		dat = dat_t[list(set(allVars))].dropna()
		tsts = dat['ccode'].astype(int)
		dat['ccode'] = pd.Categorical(tsts)
		results = smf.ols(form, data=dat).fit(cov_type='cluster', cov_kwds={'groups': dat['ccode']})
		cpFl_est.append(results.params['cpFlL1'])
		cp_est.append(results.params['cpL1'])
		cpFl_se.append(results.bse['cpFlL1'])
		cp_se.append(results.bse['cpL1'])

	q_cpFl = mean(cpFl_est)
	q_cp = mean(cp_est)

	smplVar_cpFl = sum([(x - q_cpFl)**2. for x in cpFl_est])/9.
	smplVar_cp = sum([(x - q_cp)**2. for x in cp_est])/9.

	avgVar_cpFl = mean([x**2. for x in cpFl_se])
	avgVar_cp = mean([x**2. for x in cp_se])

	SE_sqrd_cpFl = avgVar_cpFl + smplVar_cpFl*(1. + 1./10.)
	SE_sqrd_cp = avgVar_cp + smplVar_cp*(1. + 1./10.)

	return ({'Est': {'cpFlL1': q_cpFl, 'cpL1': q_cp}, 'Se': {'cpFlL1': math.sqrt(SE_sqrd_cpFl), 'cpL1': math.sqrt(SE_sqrd_cp)}})


ebaVars = [
	 'e_migdppclnL2',
	 'e_migdpgroL2',
	 'ln_rexportspgdpL2',
	 'oil_binaryL2',
	 'ethfracL2',
	 'ln_tpopL2',
	 'tpopgroL2',
	 'cheibubDemoL2',
	 'cheibubPresidL2',
	 'cheibubMilL2',
	 'gwf_partyL2',
	 'gwf_monarchL2',
	 'gwf_personalL2',
	 'cheibubLegisL2',
	 'ln_milperpcL2',
	 'ln_milperL2',
	 'ln_rCOWmilexL2',
	 'ln_CowmilexpgdpL2',
	 'ln_rCOWmilexpcL2',
	 'ln_rCOWmilexpsoldL2',
	 'cbd_changeCowMilexL2',
	 'effectivenumberL2',
	 'paramilitary_bscL2',
	 'counterbal_pthL2',
	 'counterbalancingL2',
	 'warL2',
	 'CW',
	 'intra_warL2',
	 'prioL2',
	 'OppNonViolBanksL2',
	 'ncpL1',
	 'ncpFlL1',
	 'purgesL2',
	 'latentmeanL2',#,
	 'iccprL2', #iccpr signatory
	 'ComLaw', #common law
	 'ythblgapL2', #youth population
	 'xconstL2' #executive constraints (polity IV)
	 ]

working_dir = os.path.dirname(os.path.realpath(__file__))

keepvars = ebaVars + ['latentmean', 'cpFlL1', 'cpL1', 'ccode']
for i in range(1, 11):
	keepvars += ['latentmean_post_' + str(i) + '_L2']

dat_t = []

dat_t = pd.read_csv(working_dir + '/ts_bsc.csv').filter(items = keepvars)
dat_t['ccode'] = dat_t['ccode'].apply(str)

path_out = working_dir + '/eba.out.bscA/'
fldNms = ['ID', 'Coef', 'SE', 'tVal', 'pVal']

for k in range(0, 7):
	print(k)

	print ('Starting regressions of length ' + str(k))

	j = 1
	rnd = 0
	fileFl = open(path_out + 'FailedCoupsEBA_' + str(k) +'_' +  str(rnd) + '.csv', 'w')
	fileSc = open(path_out + 'SuccCoupsEBA_' + str(k) +'_' +  str(rnd) + '.csv', 'w')
	fileId = open(path_out + 'ID_Refs_' + str(k) + '_' + str(rnd) + '.csv', 'w')

	writerFl = csv.writer(fileFl, delimiter=",")
	writerFl.writerow(fldNms)
	writerSc = csv.writer(fileSc, delimiter=",")
	writerSc.writerow(fldNms)
	writerId = csv.writer(fileId, delimiter=",")
	writerId.writerow(['ID', 'Formula'])

	cmbnations = list(itertools.combinations(ebaVars, k))
	last_round = -9
	for i in tqdm(range(len(cmbnations)), desc = 'Progress for length ' + str(k), unit_divisor=1000, miniters=int(1000/100)):
		the_vars = cmbnations[i]
		ID = 'K_' + str(k) + "_" + str(i+1)
		RHS_vars = [x for x in the_vars]

		EstFl = None
		SeFl = None
		tValFl = None

		pValFl = None
		EstSc = None
		SeSc = None
		tValSc = None
		pValSc = None
		#try:
		if 'latentmeanL2' not in RHS_vars:
			rslts = ols_cls1(dat_t, RHS_vars)

		else:
			RHS_vars2 = [x for x in RHS_vars if x != 'latentmeanL2']
			rslts = ols_cls_MI(dat_t, RHS_vars2)
		#except:
		#	print('OLS did not work for variables ' + ', '.join(RHS_vars))

		try:
			EstFl = rslts['Est']['cpFlL1']
			SeFl = rslts['Se']['cpFlL1']
			tValFl = EstFl/SeFl
			pValFl = '{0:.10f}'.format(2.*(1.-norm.cdf(abs(tValFl))))
		except:
			pass

		try:
			EstSc = rslts['Est']['cpL1']
			SeSc = rslts['Se']['cpL1']
			tValSc = EstSc/SeSc
			pValSc = 2.*(1.-norm.cdf(abs(tValSc)))
		except:
			pass

		row_cpFl = [ID,
				EstFl,
				SeFl,
				tValFl,
				pValFl
				]

		row_cp = [ID,
				EstSc,
				SeSc,
				tValSc,
				pValSc
				]

		rnd = round(i/100000)
		if last_round != rnd and last_round != -9:
			fileFl.close()
			fileSc.close()
			fileId.close()

			fileFl = open(path_out + 'FailedCoupsEBA_' + str(k) + '_' + str(rnd) + '.csv', 'w')
			fileSc = open(path_out + 'SuccCoupsEBA_' + str(k) + '_' + str(rnd) + '.csv', 'w')
			fileId = open(path_out + 'ID_Refs_' + str(k) + '_' + str(rnd) + '.csv', 'w')

			writerFl = csv.writer(fileFl, delimiter=",")
			writerFl.writerow(fldNms)
			writerSc = csv.writer(fileSc, delimiter=",")
			writerSc.writerow(fldNms)
			writerId = csv.writer(fileId, delimiter=",")
			writerId.writerow(['ID', 'Formula'])

		writerFl.writerow(row_cpFl)
		writerSc.writerow(row_cp)
		writerId.writerow([ID, 'latentmean ~ cpFl + cp + ' + ' + '.join(RHS_vars)])
	i += 1
	last_round = round(i/100000)
fileFl.close()
fileSc.close()
fileId.close()


